---
title: "Wave 2 Analysis"
output: html_document
date: "2025-7-21"
---

# Set up
```{r}

library(dplyr)
library(AER)
library(broom)
library(ggplot2)
library(blockTools)
library(tidyverse)
library(quickblock)
library(data.table)
library(randomizr)
library(gtsummary)
library(jtools)
library(pwr)
library(stargazer)
library(broom)
library(purrr)
library(psych)
library(tables)
library(Hmisc)
library(scales)
library(table1)
library(gt)
library(xtable)

```

#Import, Clean and Merge Data

```{r}
# Import, clean and combine data waves

# Wave 2 Data
wave2 <- read_csv("wave2_nopii.csv")

# Cleaning
post_data <- wave2 %>%
  mutate(
    across(starts_with("post_electoral"), ~ ifelse(. == 6, 3, .)),  # Recode '6' to '3' for electoral_record_ variables
  )

#Bring in Wave 1 data
data <- read_csv("wave1_randomized_list.csv")

#Create combined_data
combined_data <- left_join(data, post_data %>% filter(id %in% data$id), by = "id")

#variable for if they are in both surveys or not
combined_data <- combined_data %>%
  mutate(both_surveys = ifelse(!is.na(post_confidence_1), 1, 0))

#variable for white or not
combined_data <- combined_data %>%
  mutate(white = if_else(race_ethnicity == "White", 1, 0, missing = 0))

#Includes only wave 2 respondents from universe
combined_data <- combined_data %>%
  filter(both_surveys == 1)

#Attrition
m1 <-lm(both_surveys ~ condition, data = combined_data)
summary(m1)

#Attrition with model Controls
m1 <-lm(both_surveys ~ condition + voted_trump + party_scale + education + age + race_ethnicity + voted_2022 + voted_2020 + voted_2018, data = combined_data)
summary(m1)


# RECODE and Create Inverse Scales

# To facilitate clearer interpretation of models, scales have been re-coded so that HIGHER values indicate MORE record accessibility and MORE election confidence

combined_data <- combined_data %>%
  mutate(
    # Flip the scale for electoral record variables 
    electoral_record_1 = 6 - electoral_record_1,
    electoral_record_2 = 6 - electoral_record_2,
    electoral_record_3 = 6 - electoral_record_3,
    electoral_record_4 = 6 - electoral_record_4,
    
    # Flip the scale for confidence variables 
    confidence_1 = 6 - confidence_1,
    confidence_2 = 6 - confidence_2,
    confidence_3 = 6 - confidence_3,
    confidence_4 = 6 - confidence_4,
    
    # Flip the scale for post-election electoral record variables 
    post_electoral_record_1 = 6 - post_electoral_record_1,
    post_electoral_record_2 = 6 - post_electoral_record_2,
    post_electoral_record_3 = 6 - post_electoral_record_3,
    post_electoral_record_4 = 6 - post_electoral_record_4,
    
    # Flip the scale for post-election confidence variables 
    post_confidence_1 = 6 - post_confidence_1,
    post_confidence_2 = 6 - post_confidence_2,
    post_confidence_3 = 6 - post_confidence_3,
    post_confidence_4 = 6 - post_confidence_4
  ) %>%
  mutate(
    # Calculate indices by averaging relevant columns
    electoral_record_index = (electoral_record_1 + electoral_record_2 + 
                                      electoral_record_3 + electoral_record_4) / 4,
    confidence_index = (confidence_1 + confidence_2 + 
                                confidence_3 + confidence_4) / 4,
    electoral_record_index_post = (post_electoral_record_1 + post_electoral_record_2 + 
                                           post_electoral_record_3 + post_electoral_record_4) / 4,
    confidence_index_post = (post_confidence_1 + post_confidence_2 + 
                                     post_confidence_3 + post_confidence_4) / 4,
    concern_index_post = (post_impersonation + post_stealing_tampering + 
                                  post_non_citizen_voting + post_official_tampering + 
                                  post_machine_errors) / 5
  ) %>%
  mutate(
    # Standardize the newly created index columns
    electoral_record_index_post = scale(electoral_record_index_post, center = TRUE, scale = TRUE),
    confidence_index_post = scale(confidence_index_post, center = TRUE, scale = TRUE),
    concern_index_post = scale(concern_index_post, center = TRUE, scale = TRUE),
    electoral_record_index = scale(electoral_record_index, center = TRUE, scale = TRUE),
    confidence_index = scale(confidence_index, center = TRUE, scale = TRUE)
  )


```

# Balance tests

```{r}

covariates <- c("age","voted_trump", "education", 
                "electoral_record_index", "confidence_index",
                "low_propensity_voter")

balance_test <- function(combined_data, covariate) {
  summary <- combined_data %>%
    group_by(condition) %>%
    summarise_at(vars({{covariate}}), list(mean = ~mean(., na.rm = TRUE)))
  
  p_values <- t.test(combined_data %>% filter(condition == unique(condition)[1])
                     %>% pull({{covariate}}),
                     combined_data %>% filter(condition == unique(condition)[2])
                     %>% pull({{covariate}}))$p.value
  
  summary <- bind_cols(summary, p_value = p_values)
  return(summary)
}

# Apply the function to each covariate
results <- lapply(covariates, function(x) balance_test(combined_data, x))
results_df <- bind_rows(results, .id = "Variable")
print(results_df)

#Gender
combined_data %>%
  tbl_cross(row = gender, col = condition, percent = "cell") %>%
  add_p()


#Party
combined_data %>%
  tbl_cross(row = party_id, col = condition, percent = "cell") %>%
  add_p()

#race
combined_data %>%
  tbl_cross(row = white, col = condition, percent = "cell") %>%
  add_p()




```

# MANUSCRIPT

# Table 4: Effect of Experimental Conditions on Election Attitudes and Perceptions

```{r, warning = FALSE}
# Covariates
covariates <- "confidence_index + electoral_record_index + voted_trump + party_scale + education + age + race_ethnicity + voted_2022 + voted_2020 + voted_2018 "

#H1a and H1b
m1a <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), data = combined_data)

m1b <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), data = combined_data)

subset_data <-subset(combined_data, condition %in% c("tool", "pr"))

#H2a and H2b
m2a <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), data = subset_data)

m2b <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), data = subset_data)

# R1
m3 <- lm(as.formula(paste("concern_index_post ~ condition +", covariates)), data = combined_data)

# Make table
stargazer(
  list(m1a,  m1b, m3),
  covariate.labels = c("Public Records", "Online Search Tool", "Confidence_{t-1}", "Accessibility_{t-1}", "Trump 2020", "Party", "Education", "Age", "American Indian", "Asian", "Black", "Latino", "MENA", "NHPI", "Other", "Voted 2022", "Voted 2020", "Voted 2018"),
  column.labels = c("Access", "Confidence", "Issue"),
#  column.separate = c(2, 2, 1),  # Separate Access+Confidence, Tool+PR, and Prevalence
  omit.stat = c("rsq", "f"), 
  df = FALSE,
  align = TRUE,
  digits = 2,
  no.space = TRUE,
  title = "Effect of Experimental Conditions on Election Attitudes and Perceptions",
  dep.var.labels.include = FALSE,  # Suppress default dependent variable labels
  column.sep.width = "2pt",  # Reduce the space between columns
  header = FALSE,  # Suppress the default table header
  notes = "Note: Models control for baseline values and demographic covariates.",
  type = "latex",
 star.char = c("*", "**", "***"),
          star.cutoffs = c(.1, .05, .01)
)


```


# Subgroup Treatment Effects Models

```{r message=FALSE, warning=FALSE}

df_white <- subset(combined_data, race_ethnicity == "White")
df_nonwhite <- subset(combined_data, race_ethnicity != "White")

# Subgroup Models

covariates <- "confidence_index + electoral_record_index + voted_trump + party_scale + education + age + voted_2022 + voted_2020 + voted_2018"

#Race
m_race_white1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                   data = df_white)

m_race_nonwhite1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                      data = df_nonwhite)

m_race_white2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                   data = df_white)

m_race_nonwhite2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                      data = df_nonwhite)

df_nonvoter <- subset(combined_data, low_propensity_voter == 1)
df_voter <- subset(combined_data, low_propensity_voter == 0)

#Vote Propensity
covariates <- "confidence_index + electoral_record_index + voted_trump + party_scale + education + age + race_ethnicity"

m_nonvoter1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                      data = df_nonvoter)

m_voter1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                   data = df_voter)

m_nonvoter2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                      data = df_nonvoter)

m_voter2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                   data = df_voter)


df_rep <- subset(combined_data, party_scale >= 5)
df_dem <- subset(combined_data, party_scale < 4)

covariates <- "confidence_index + electoral_record_index + voted_trump + education + age + race_ethnicity + voted_2022 + voted_2020 + voted_2018"

#Party

m_rep1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                    data = df_rep)

m_dem1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                  data =  df_dem)

m_rep2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                    data = df_rep)

m_dem2 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                  data =  df_dem)

#Education 

covariates <- "confidence_index + electoral_record_index + voted_trump + party_scale + age + race_ethnicity + voted_2022 + voted_2020 + voted_2018"

# Calculate the median of `education`
education_median <- median(combined_data$education, na.rm = TRUE)
df_educ_high <- subset(combined_data, education >= education_median)
df_educ_low  <- subset(combined_data, education < education_median)

# Fit the model for participants with education >= median
educ1 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                       data = df_educ_high)
educ2 <- lm(as.formula(paste("electoral_record_index_post ~ condition +", covariates)), 
                       data = df_educ_low)
educ3 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                       data = df_educ_high)
educ4 <- lm(as.formula(paste("confidence_index_post ~ condition +", covariates)), 
                       data = df_educ_low)


```

# Probability of Superiority 
```{r}
#probability of superiorty function
get_ps <- function(model, var, data, outcome) {
  beta <- coef(model)[var]
  sd_outcome <- sd(data[[outcome]], na.rm = TRUE)
  cohens_d <- beta / sd_outcome
  ps <- pnorm(cohens_d / sqrt(2))
  return(ps)
}


ps_m1a <- get_ps(m1a, "conditiontool", combined_data, "electoral_record_index_post")
ps_m1a_pr <- get_ps(m1a, "conditionpr", combined_data, "electoral_record_index_post")
ps_m1b <- get_ps(m1b, "conditiontool", combined_data, "confidence_index_post")
ps_m1b_pr <- get_ps(m1b, "conditionpr", combined_data, "confidence_index_post")
ps_m2a <- get_ps(m2a, "conditiontool", subset_data, "electoral_record_index_post")
ps_m2b <- get_ps(m2b, "conditiontool", subset_data, "confidence_index_post")
ps_race_white1 <- get_ps(m_race_white1, "conditiontool", df_white, "electoral_record_index_post")
ps_race_white1_pr <- get_ps(m_race_white1, "conditionpr", df_white, "electoral_record_index_post")
ps_race_nonwhite1 <- get_ps(m_race_nonwhite1, "conditiontool", df_nonwhite, "electoral_record_index_post")
ps_race_nonwhite1_pr <- get_ps(m_race_nonwhite1, "conditionpr", df_nonwhite, "electoral_record_index_post")
ps_race_white2 <- get_ps(m_race_white2, "conditiontool", df_white, "confidence_index_post")
ps_race_white2_pr <- get_ps(m_race_white2, "conditionpr", df_white, "confidence_index_post")
ps_race_nonwhite2 <- get_ps(m_race_nonwhite2, "conditiontool", df_nonwhite, "confidence_index_post")
ps_race_nonwhite2_pr <- get_ps(m_race_nonwhite2, "conditionpr", df_nonwhite, "confidence_index_post")
ps_nonvoter1 <- get_ps(m_nonvoter1, "conditiontool", df_nonvoter, "electoral_record_index_post")
ps_nonvoter1_pr <- get_ps(m_nonvoter1, "conditionpr", df_nonvoter, "electoral_record_index_post")
ps_voter1 <- get_ps(m_voter1, "conditiontool", df_voter, "electoral_record_index_post")
ps_voter1_pr <- get_ps(m_voter1, "conditionpr", df_voter, "electoral_record_index_post")
ps_nonvoter2 <- get_ps(m_nonvoter2, "conditiontool", df_nonvoter, "confidence_index_post")
ps_nonvoter2_pr <- get_ps(m_nonvoter2, "conditionpr", df_nonvoter, "confidence_index_post")
ps_voter2 <- get_ps(m_voter2, "conditiontool", df_voter, "confidence_index_post")
ps_voter2_pr <- get_ps(m_voter2, "conditionpr", df_voter, "confidence_index_post")
ps_rep1 <- get_ps(m_rep1, "conditiontool", df_rep, "electoral_record_index_post")
ps_rep1_pr <- get_ps(m_rep1, "conditionpr", df_rep, "electoral_record_index_post")
ps_dem1 <- get_ps(m_dem1, "conditiontool", df_dem, "electoral_record_index_post")
ps_dem1_pr <- get_ps(m_dem1, "conditionpr", df_dem, "electoral_record_index_post")
ps_rep2 <- get_ps(m_rep2, "conditiontool", df_rep, "confidence_index_post")
ps_rep2_pr <- get_ps(m_rep2, "conditionpr", df_rep, "confidence_index_post")
ps_dem2 <- get_ps(m_dem2, "conditiontool", df_dem, "confidence_index_post")
ps_dem2_pr <- get_ps(m_dem2, "conditionpr", df_dem, "confidence_index_post")
ps_educ1 <- get_ps(educ1, "conditiontool", df_educ_high, "electoral_record_index_post")
ps_educ1_pr <- get_ps(educ1, "conditionpr", df_educ_high, "electoral_record_index_post")
ps_educ2 <- get_ps(educ2, "conditiontool", df_educ_low, "electoral_record_index_post")
ps_educ2_pr <- get_ps(educ2, "conditionpr", df_educ_low, "electoral_record_index_post")
ps_educ3 <- get_ps(educ3, "conditiontool", df_educ_high, "confidence_index_post")
ps_educ3_pr <- get_ps(educ3, "conditionpr", df_educ_high, "confidence_index_post")
ps_educ4 <- get_ps(educ4, "conditiontool", df_educ_low, "confidence_index_post")
ps_educ4_pr <- get_ps(educ4, "conditionpr", df_educ_low, "confidence_index_post")


ps_results <- tibble::tibble(
  model = c(
    "ps_m1a", "ps_m1a_pr", "ps_m1b", "ps_m1b_pr", "ps_m2a", "ps_m2b",
    "ps_race_white1", "ps_race_white1_pr", "ps_race_nonwhite1", "ps_race_nonwhite1_pr",
    "ps_race_white2", "ps_race_white2_pr", "ps_race_nonwhite2", "ps_race_nonwhite2_pr",
    "ps_nonvoter1", "ps_nonvoter1_pr", "ps_voter1", "ps_voter1_pr",
    "ps_nonvoter2", "ps_nonvoter2_pr", "ps_voter2", "ps_voter2_pr",
    "ps_rep1", "ps_rep1_pr", "ps_dem1", "ps_dem1_pr",
    "ps_rep2", "ps_rep2_pr", "ps_dem2", "ps_dem2_pr",
    "ps_educ1", "ps_educ1_pr", "ps_educ2", "ps_educ2_pr",
    "ps_educ3", "ps_educ3_pr", "ps_educ4", "ps_educ4_pr"
  ),
  ps = c(
    ps_m1a, ps_m1a_pr, ps_m1b, ps_m1b_pr, ps_m2a, ps_m2b,
    ps_race_white1, ps_race_white1_pr, ps_race_nonwhite1, ps_race_nonwhite1_pr,
    ps_race_white2, ps_race_white2_pr, ps_race_nonwhite2, ps_race_nonwhite2_pr,
    ps_nonvoter1, ps_nonvoter1_pr, ps_voter1, ps_voter1_pr,
    ps_nonvoter2, ps_nonvoter2_pr, ps_voter2, ps_voter2_pr,
    ps_rep1, ps_rep1_pr, ps_dem1, ps_dem1_pr,
    ps_rep2, ps_rep2_pr, ps_dem2, ps_dem2_pr,
    ps_educ1, ps_educ1_pr, ps_educ2, ps_educ2_pr,
    ps_educ3, ps_educ3_pr, ps_educ4, ps_educ4_pr
  )
)

print(ps_results)

```


# Table 5: Conditional Average Treatment Effects with FDR Adjusted P-Values

```{r message=FALSE, warning=FALSE}

model_list <- list(m_race_white1 = m_race_white1, m_race_nonwhite1 = m_race_nonwhite1,
                   m_race_white2 = m_race_white2, m_race_nonwhite2 = m_race_nonwhite2,
                   m_nonvoter1 = m_nonvoter1, m_voter1 = m_voter1, 
                   m_nonvoter2 = m_nonvoter2, m_voter2 = m_voter2,
                   m_rep1 = m_rep1, m_dem1 = m_dem1, m_rep2 = m_rep2, m_dem2 = m_dem2,
                   educ1 =educ1, educ2=educ2, educ3= educ3, educ4= educ4)

#   extract coefficients, standard errors, and p-values
extract_model_stats <- function(model) {
  coef_summary <- summary(model)$coefficients
  data.frame(
    Term = rownames(coef_summary),
    Estimate = coef_summary[, "Estimate"],
    Std_Error = coef_summary[, "Std. Error"],
    P_Value = coef_summary[, "Pr(>|t|)"]
  )
}

# Extract model statistics for all models
model_stats_list <- lapply(model_list, extract_model_stats)

# Add model names to each dataframe
for (i in seq_along(model_stats_list)) {
  model_stats_list[[i]]$Model <- names(model_list)[i]
}
all_model_stats <- do.call(rbind, model_stats_list)

# Apply FDR correction to all p-values
all_model_stats$Adjusted_P <- p.adjust(all_model_stats$P_Value, method = "BH")

# Filter the results to include only `condition` variables
condition_terms_df <- all_model_stats[grep("condition", all_model_stats$Term), ]

#Clear labels
term_labels <- c(
  conditionpr = "PR",
  conditiontool = "Tool", 
  conditioncontrol = "Control"
)
condition_terms_df$Term_Label <- term_labels[condition_terms_df$Term]

# Extract # of observations for each model
extract_nobs <- function(model) nobs(model)
nobs_list <- sapply(model_list, extract_nobs)

condition_terms_df$Observations <- nobs_list[condition_terms_df$Model]


# Export Model Results 

model_labels <- c(
  "m_race_white1" = "Access: White",
  "m_race_nonwhite1" = "Access:Non-White",
  "m_nonvoter1" = "Access:Low Propensity",
  "m_voter1" = "Access: High Propensity",
  "m_rep1" = "Access: Republican",
  "m_dem1" = "Access: Democrat",
  "educ1" = "Access: Bachelors' Degree",
  "educ2" = "Access: No BA",
   "m_race_white2" = "Confidence: White",
  "m_race_nonwhite2" = "Confidence: Non-White",
  "m_nonvoter2" = "Confidence: Low Propensity Voter",
  "m_voter2" = "Confidence: High Propensity Voter",
  "m_rep2" = "Confidence: Republican",
  "m_dem2" = "Confidence: Democrat",
  "educ3" = "Confidence: Bachelor's Degree",
  "educ4" = "Confidence: No BA"
)

condition_terms_df$Model <- model_labels[condition_terms_df$Model]

condition_terms_df <- condition_terms_df[, c("Model", "Term_Label", "Estimate", "Std_Error", "P_Value", "Adjusted_P",  "Observations")]


# Convert dataframe to an xtable object
latex_table <- xtable(condition_terms_df, 
                      caption = "Model Results: Condition Variables",
                      label = "tab:condition_results")
#for latex
print(latex_table, include.rownames = FALSE, booktabs = TRUE)

```


# Table 6: Perceptions of Public Availability of Voting Records in North Carolina
```{r message=FALSE, warning=FALSE}

summary_table <- combined_data %>%
  select(condition,
         reaction_increase_transparency, reaction_pressure_to_vote,
         reaction_privacy_worry, reaction_trust_system_verify, 
         reaction_distrust_privacy, reaction_misuse_suppression, 
         reaction_neutral_public_data, reaction_reduce_fraud, 
         reaction_surprised_public_history, reaction_confidence_admin_system) %>%
  tbl_summary(by = condition,  # Summarize by `condition`
              statistic = all_categorical() ~ "{n} ({p}%)", # Display percentages
              missing = "no") %>%
  add_p() %>%  
  modify_header(label = "**Variable**") %>%
  modify_caption("**Comparison of Variables by Experimental Condition**")

# Add labels

descriptive_labels <- list(
  reaction_increase_transparency = "Increases transparency in the election process. ",
  reaction_pressure_to_vote = "Makes me feel more pressure to vote. ",
  reaction_privacy_worry = "Worry about my privacy. ",
  reaction_trust_system_verify = "More trusting because I can verify participation records. ",
  reaction_distrust_privacy = "More distrust because of privacy concerns. ",
  reaction_misuse_suppression = "Worry that information could be misused or lead to voter suppression. ",
  reaction_neutral_public_data = "Neutral about the public availability of voter participation data. ",
  reaction_reduce_fraud = "Belief that availability of this information could reduce fraud. ",
  reaction_surprised_public_history = "Surprised to learn that voting history is public. ",
  reaction_confidence_admin_system = "Confident in the election process due to well-functioning administrative system. "
)

# Generate the summary table with labels
summary_table <- combined_data %>%
  select(
    reaction_increase_transparency, reaction_trust_system_verify,  reaction_reduce_fraud,   reaction_misuse_suppression, 
          reaction_confidence_admin_system,   reaction_neutral_public_data, 
         reaction_surprised_public_history,reaction_privacy_worry, reaction_distrust_privacy, reaction_pressure_to_vote,) %>%
  tbl_summary(
    statistic = all_categorical() ~ "{n} ({p}%)",  
    missing = "no",  
    label = descriptive_labels 
  ) %>%
  modify_header(label = "**Variable**") %>%
  modify_caption("Reflecting on the fact that who votes in each election in North Carolina is public information online, which of the following statements reflect your view of the election process? Check all that apply.") %>%
  bold_labels()

# Print the summary table
summary_table

```

# APPENDIX C

#  C.1.1 and C.1.2 Descriptive Stats 
```{r}
# Wave 1 Universe

label(data$age) <- "Age"
label(data$education) <- "Education Level"
label(data$race_ethnicity) <- "Race/Ethnicity"
label(data$party_scale) <- "Party Scale"
label(data$voted_2022) <- "Voted in 2022"
label(data$voted_2020) <- "Voted in 2020"
label(data$voted_2018) <- "Voted in 2018"
desc_wave1 <- table1(~ age + education + race_ethnicity + party_scale + voted_2022 + voted_2020 + voted_2018 | condition, data=data, topclass="Rtable1-zebra")
desc_wave1
# save as desc_wave1.png

# Wave 1/2 Combined 
Hmisc::label(combined_data$age) <- "Age"
Hmisc::label(combined_data$education) <- "Education Level"
Hmisc::label(combined_data$race_ethnicity) <- "Race/Ethnicity"
Hmisc::label(combined_data$party_scale) <- "Party Scale (7 point)"
Hmisc::label(combined_data$voted_2022) <- "Voted in 2022"
Hmisc::label(combined_data$voted_2020) <- "Voted in 2020"
Hmisc::label(combined_data$voted_2018) <- "Voted in 2018"
desc_wave2 <- table1(~ age + education + race_ethnicity + party_scale + voted_2022 + voted_2020 + voted_2018 | condition, data=combined_data, topclass="Rtable1-zebra")
desc_wave2
# save as desc_wave1and2.png

```


# C.5 Recall of Experimental Treatments

```{r}

#Table focused on Recall
combined_data <- combined_data %>%
  mutate(
    across(
      .cols = c(own_vote_hist, other_vote_hist, mail_vote_remind, mail_tool_prior),
      .fns = ~ ifelse(is.na(.x), "Didn't answer", as.character(.x))
    )
  ) %>%
  mutate(
    across(
      .cols = c(own_vote_hist, other_vote_hist, mail_vote_remind, mail_tool_prior),
      .fns = as.factor
    )
  )

#  relevel each variable
combined_data <- combined_data %>%
  mutate(
    own_vote_hist    = fct_relevel(own_vote_hist,    "Yes", "No", "I'm not sure", "Didn't answer"),
    other_vote_hist  = fct_relevel(other_vote_hist,  "Yes", "No", "I'm not sure", "Didn't answer"),
    mail_vote_remind = fct_relevel(mail_vote_remind, "Yes", "No", "I'm not sure", "Didn't answer"),
    mail_tool_prior  = fct_relevel(mail_tool_prior,  "Yes", "No", "I'm not sure", "Didn't answer")
  )

#  Create each table separately
tbl1 <- combined_data %>%
  tbl_cross(
    row = own_vote_hist,
    col = condition,
    percent = "col"
  ) %>%
  add_p()

tbl2 <- combined_data %>%
  tbl_cross(
    row = other_vote_hist,
    col = condition,
    percent = "col"
  ) %>%
  add_p()

tbl3 <- combined_data %>%
  tbl_cross(
    row = mail_vote_remind,
    col = condition,
    percent = "col"
  ) %>%
  add_p()

tbl4 <- combined_data %>%
  tbl_cross(
    row = mail_tool_prior,
    col = condition,
    percent = "col"
  ) %>%
  add_p()

# Put it all together

final_table <- tbl_stack(
  tbls = list(tbl1, tbl2, tbl3, tbl4),
  group_header = c(
    "Looked up own history?",
    "Looked up someone else's history?",
    "Received mail reminder of prior votes?",
    "Received mail about checking prior votes?"
  )
)

latex_code <- final_table %>%
  as_gt() %>%
  gt::as_latex()

cat(latex_code)
```


# C.6: Power Analysis
```{r}

### MDE for ATE overall

R2_cov <- 0.5
n <- 1000  
power <- 0.90  
sig.level <- 0.05  

pwr_result <- pwr.t.test(n = n, power = power, sig.level = sig.level, type = "two.sample")

# Adjust effect size based on covariates
d_adjusted <- pwr_result$d * sqrt(1 - R2_cov)
d_adjusted

###  MDE for subgroup effects
###  Assuming smaller sample size
### Number of respondents per condition
n <- 400  
pwr_result <- pwr.t.test(n = n, power = power, sig.level = sig.level, type = "two.sample")
d_adjusted <- pwr_result$d * sqrt(1 - R2_cov)
d_adjusted

# Power Anlaysis for  n = 2314

n <- 775  
pwr_result <- pwr.t.test(n = n, power = power, sig.level = sig.level, type = "two.sample")
d_adjusted <- pwr_result$d * sqrt(1 - R2_cov)
d_adjusted

n <- 250  
pwr_result <- pwr.t.test(n = n, power = power, sig.level = sig.level, type = "two.sample")
d_adjusted <- pwr_result$d * sqrt(1 - R2_cov)
d_adjusted

```


